library(lavaan)
library(dplyr)
library(reticulate)
library(marginaleffects)
library(modelsummary)
library(ggplot2)
library(egg)
library(lme4)
library(semPlot)
library(tinytable)
library(kableExtra)
library(reshape2)
options(rstudio.python.installationPath = "/Users/nathanielforde/mambaforge/envs")
options("modelsummary_factory_default" = "tinytable")
options(repr.plot.width=15, repr.plot.height=8)
knitr::knit_engines$set(python = reticulate::eng_python)
options(scipen=999)
set.seed(100)
Warning
This is a work in progress. We intend to elaborate the choices related to analysing and understanding the data elicted through intentional psychometrics surveys
Measurment and Measurment Constructs
Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.
df = read.csv('sem_data.csv')
inflate <- df$region == 'west'
noise <- rnorm(sum(inflate),1, 0.3) # generate the noise to add
df$ls_p1[inflate] <- df$ls_p1[inflate] + noise
df$ls_sum <- df$ls_p1 + df$ls_p2 + df$ls_p3
df$ls_mean <- rowMeans(df[ c('ls_p1', 'ls_p2', 'ls_p3')])
head(df) |> kable()| ID | region | gender | age | se_acad_p1 | se_acad_p2 | se_acad_p3 | se_social_p1 | se_social_p2 | se_social_p3 | sup_friends_p1 | sup_friends_p2 | sup_friends_p3 | sup_parents_p1 | sup_parents_p2 | sup_parents_p3 | ls_p1 | ls_p2 | ls_p3 | ls_sum | ls_mean |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | west | female | 13 | 4.857143 | 5.571429 | 4.500000 | 5.80 | 5.500000 | 5.40 | 6.5 | 6.5 | 7.0 | 7.0 | 7.0 | 6.0 | 6.182676 | 6.75 | 5.50 | 18.43268 | 6.144225 |
| 2 | west | male | 14 | 4.571429 | 4.285714 | 4.666667 | 5.00 | 5.500000 | 4.80 | 4.5 | 4.5 | 5.5 | 5.0 | 6.0 | 4.5 | 5.372793 | 5.00 | 4.50 | 14.87279 | 4.957598 |
| 10 | west | female | 14 | 4.142857 | 6.142857 | 5.333333 | 5.20 | 4.666667 | 6.00 | 4.0 | 4.5 | 3.5 | 7.0 | 7.0 | 6.5 | 7.309658 | 5.50 | 4.00 | 16.80966 | 5.603219 |
| 11 | west | female | 14 | 5.000000 | 5.428571 | 4.833333 | 6.40 | 5.833333 | 6.40 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 7.0 | 5.599369 | 6.50 | 6.25 | 18.34937 | 6.116456 |
| 12 | west | female | 14 | 5.166667 | 5.600000 | 4.800000 | 5.25 | 5.400000 | 5.25 | 7.0 | 7.0 | 7.0 | 6.5 | 6.5 | 7.0 | 6.701758 | 6.00 | 5.75 | 18.45176 | 6.150586 |
| 14 | west | male | 14 | 4.857143 | 4.857143 | 4.166667 | 5.20 | 5.000000 | 4.20 | 5.5 | 6.5 | 7.0 | 6.5 | 6.5 | 6.5 | 6.095589 | 5.50 | 5.50 | 17.09559 | 5.698530 |
#| code-fold: true
#| code-summary: "Show the code"
datasummary_skim(df)|>
style_tt(
i = 15:17,
j = 1:1,
background = "#20AACC",
color = "white",
italic = TRUE) |>
style_tt(
i = 18:19,
j = 1:1,
background = "#2888A0",
color = "white",
italic = TRUE) |>
style_tt(
i = 2:14,
j = 1:1,
background = "#17C2AD",
color = "white",
italic = TRUE)| Unique | Missing Pct. | Mean | SD | Min | Median | Max | Histogram | |
|---|---|---|---|---|---|---|---|---|
| ID | 283 | 0 | 187.9 | 106.3 | 1.0 | 201.0 | 367.0 | |
| age | 5 | 0 | 14.7 | 0.8 | 13.0 | 15.0 | 17.0 | |
| se_acad_p1 | 32 | 0 | 5.2 | 0.8 | 3.1 | 5.1 | 7.0 | |
| se_acad_p2 | 36 | 0 | 5.3 | 0.7 | 3.1 | 5.4 | 7.0 | |
| se_acad_p3 | 29 | 0 | 5.2 | 0.8 | 2.8 | 5.2 | 7.0 | |
| se_social_p1 | 24 | 0 | 5.3 | 0.8 | 1.8 | 5.4 | 7.0 | |
| se_social_p2 | 27 | 0 | 5.5 | 0.7 | 2.7 | 5.5 | 7.0 | |
| se_social_p3 | 31 | 0 | 5.4 | 0.8 | 3.0 | 5.5 | 7.0 | |
| sup_friends_p1 | 13 | 0 | 5.8 | 1.1 | 1.0 | 6.0 | 7.0 | |
| sup_friends_p2 | 10 | 0 | 6.0 | 0.9 | 2.5 | 6.0 | 7.0 | |
| sup_friends_p3 | 13 | 0 | 6.0 | 1.1 | 1.0 | 6.0 | 7.0 | |
| sup_parents_p1 | 11 | 0 | 6.0 | 1.1 | 1.0 | 6.0 | 7.0 | |
| sup_parents_p2 | 11 | 0 | 5.9 | 1.1 | 2.0 | 6.0 | 7.0 | |
| sup_parents_p3 | 13 | 0 | 5.7 | 1.1 | 1.0 | 6.0 | 7.0 | |
| ls_p1 | 156 | 0 | 5.7 | 1.1 | 2.0 | 5.7 | 8.1 | |
| ls_p2 | 21 | 0 | 5.8 | 0.7 | 2.5 | 5.8 | 7.0 | |
| ls_p3 | 22 | 0 | 5.2 | 0.9 | 2.0 | 5.2 | 7.0 | |
| ls_sum | 218 | 0 | 16.7 | 2.1 | 8.7 | 17.0 | 20.8 | |
| ls_mean | 217 | 0 | 5.6 | 0.7 | 2.9 | 5.7 | 6.9 | |
| N | % | |||||||
| region | east | 142 | 50.2 | |||||
| west | 141 | 49.8 | ||||||
| gender | female | 132 | 46.6 | |||||
| male | 151 | 53.4 |
Fit Initial Regression Models
formula_sum_1st = " ls_sum ~ se_acad_p1 + se_social_p1 + sup_friends_p1 + sup_parents_p1"
formula_mean_1st = " ls_mean ~ se_acad_p1 + se_social_p1 + sup_friends_p1 + sup_parents_p1"
formula_sum_12 = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_social_p1 + se_social_p2 +
sup_friends_p1 + sup_friends_p2 + sup_parents_p1 + sup_parents_p2"
formula_mean_12 = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_social_p1 + se_social_p2 +
sup_friends_p1 + sup_friends_p2 + sup_parents_p1 + sup_parents_p2"
formula_sum = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 + se_social_p2 + se_social_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
formula_mean = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 + se_social_p2 + se_social_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
mod_sum = lm(formula_sum, df)
mod_sum_1st = lm(formula_sum_1st, df)
mod_sum_12 = lm(formula_sum_12, df)
mod_mean = lm(formula_mean, df)
mod_mean_1st = lm(formula_mean_1st, df)
mod_mean_12 = lm(formula_mean_12, df)
min_max_norm <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
df_norm <- as.data.frame(lapply(df[c(5:19)], min_max_norm))
df_norm$ls_sum <- df$ls_sum
df_norm$ls_mean <- df$ls_mean
mod_sum_norm = lm(formula_sum, df_norm)
mod_mean_norm = lm(formula_mean, df_norm)
models = list(
"Outcome: sum_score" = list("model_sum_1st_factors" = mod_sum_1st,
"model_sum_1st_2nd_factors" = mod_sum_12,
"model_sum_score" = mod_sum,
"model_sum_score_norm" = mod_sum_norm
),
"Outcome: mean_score" = list(
"model_mean_1st_factors" = mod_mean_1st,
"model_mean_1st_2nd_factors" = mod_mean_12,
"model_mean_score"= mod_mean,
"model_mean_score_norm" = mod_mean_norm
)
)#| code-fold: true
#| code-summary: "Show the code"
modelsummary(models, stars=TRUE, shape ="cbind") |>
style_tt(
i = 2:25,
j = 1:1,
background = "#17C2AD",
color = "white",
italic = TRUE)| Outcome: sum_score | Outcome: mean_score | |||||||
|---|---|---|---|---|---|---|---|---|
| model_sum_1st_factors | model_sum_1st_2nd_factors | model_sum_score | model_sum_score_norm | model_mean_1st_factors | model_mean_1st_2nd_factors | model_mean_score | model_mean_score_norm | |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||||||||
| (Intercept) | 6.307*** | 4.318*** | 3.948*** | 8.797*** | 2.102*** | 1.439*** | 1.316*** | 2.932*** |
| (0.915) | (1.020) | (1.001) | (0.690) | (0.305) | (0.340) | (0.334) | (0.230) | |
| se_acad_p1 | 0.121 | -0.098 | -0.122 | -0.470 | 0.040 | -0.033 | -0.041 | -0.157 |
| (0.148) | (0.186) | (0.202) | (0.779) | (0.049) | (0.062) | (0.067) | (0.260) | |
| se_social_p1 | 1.107*** | 0.663** | 0.540* | 2.807* | 0.369*** | 0.221** | 0.180* | 0.936* |
| (0.163) | (0.212) | (0.210) | (1.091) | (0.054) | (0.071) | (0.070) | (0.364) | |
| sup_friends_p1 | 0.089 | -0.209 | -0.251 | -1.503 | 0.030 | -0.070 | -0.084 | -0.501 |
| (0.088) | (0.137) | (0.158) | (0.945) | (0.029) | (0.046) | (0.053) | (0.315) | |
| sup_parents_p1 | 0.567*** | 0.332* | 0.175 | 1.053 | 0.189*** | 0.111* | 0.058 | 0.351 |
| (0.100) | (0.146) | (0.150) | (0.901) | (0.033) | (0.049) | (0.050) | (0.300) | |
| se_acad_p2 | 0.360+ | 0.352+ | 1.357+ | 0.120+ | 0.117+ | 0.452+ | ||
| (0.204) | (0.212) | (0.817) | (0.068) | (0.071) | (0.272) | |||
| se_social_p2 | 0.563* | 0.364 | 1.575 | 0.188* | 0.121 | 0.525 | ||
| (0.220) | (0.230) | (0.996) | (0.073) | (0.077) | (0.332) | |||
| sup_friends_p2 | 0.333* | 0.331* | 1.487* | 0.111* | 0.110* | 0.496* | ||
| (0.163) | (0.168) | (0.755) | (0.054) | (0.056) | (0.252) | |||
| sup_parents_p2 | 0.267+ | 0.038 | 0.189 | 0.089+ | 0.013 | 0.063 | ||
| (0.143) | (0.151) | (0.755) | (0.048) | (0.050) | (0.252) | |||
| se_acad_p3 | -0.141 | -0.588 | -0.047 | -0.196 | ||||
| (0.183) | (0.762) | (0.061) | (0.254) | |||||
| se_social_p3 | 0.385* | 1.541* | 0.128* | 0.514* | ||||
| (0.168) | (0.674) | (0.056) | (0.225) | |||||
| sup_friends_p3 | 0.110 | 0.657 | 0.037 | 0.219 | ||||
| (0.137) | (0.820) | (0.046) | (0.273) | |||||
| sup_parents_p3 | 0.492*** | 2.953*** | 0.164*** | 0.984*** | ||||
| (0.132) | (0.791) | (0.044) | (0.264) | |||||
| Num.Obs. | 283 | 283 | 283 | 283 | 283 | 283 | 283 | 283 |
| R2 | 0.377 | 0.420 | 0.460 | 0.460 | 0.377 | 0.420 | 0.460 | 0.460 |
| R2 Adj. | 0.368 | 0.403 | 0.436 | 0.436 | 0.368 | 0.403 | 0.436 | 0.436 |
| AIC | 1096.3 | 1084.4 | 1071.9 | 1071.9 | 474.5 | 462.6 | 450.1 | 450.1 |
| BIC | 1118.1 | 1120.9 | 1122.9 | 1122.9 | 496.3 | 499.1 | 501.1 | 501.1 |
| Log.Lik. | -542.137 | -532.214 | -521.943 | -521.943 | -231.230 | -221.306 | -211.035 | -211.035 |
| RMSE | 1.64 | 1.59 | 1.53 | 1.53 | 0.55 | 0.53 | 0.51 | 0.51 |
models = list(
"model_sum_1st_factors" = mod_sum_1st,
"model_sum_1st_2nd_factors" = mod_sum_12,
"model_sum_score" = mod_sum,
"model_sum_score_norm" = mod_sum_norm,
"model_mean_1st_factors" = mod_mean_1st,
"model_mean_1st_2nd_factors" = mod_mean_12,
"model_mean_score"= mod_mean,
"model_mean_score_norm" = mod_mean_norm
)
modelplot(models, coef_omit = 'Intercept') + geom_vline(xintercept = 0, linetype="dotted",
color = "black") + ggtitle("Comparing Model Parameter Estimates", "Across Covariates")Significant Coefficients
g1 = modelplot(mod_sum, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"), guide = "none")
g2 = modelplot(mod_mean, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"))
plot <- ggarrange(g1,g2, ncol=2, nrow=1);Aggregate Driver Scores
df$se_acad_mean <- rowMeans(df[c('se_acad_p1', 'se_acad_p2', 'se_acad_p3')])
df$se_social_mean <- rowMeans(df[c('se_social_p1', 'se_social_p2', 'se_social_p3')])
df$sup_friends_mean <- rowMeans(df[c('sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3')])
df$sup_parents_mean <- rowMeans(df[c('sup_parents_p1', 'sup_parents_p2', 'sup_parents_p3')])
formula_parcel_mean = "ls_mean ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"
formula_parcel_sum = "ls_sum ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"
mod_sum_parcel = lm(formula_parcel_sum, df)
mod_mean_parcel = lm(formula_parcel_mean, df)
models_parcel = list("model_sum_score" = mod_sum_parcel,
"model_mean_score"= mod_mean_parcel
)
modelsummary(models_parcel, stars=TRUE)| model_sum_score | model_mean_score | |
|---|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||
| (Intercept) | 4.559*** | 1.520*** |
| (0.972) | (0.324) | |
| se_acad_mean | 0.114 | 0.038 |
| (0.165) | (0.055) | |
| se_social_mean | 1.269*** | 0.423*** |
| (0.183) | (0.061) | |
| sup_friends_mean | 0.075 | 0.025 |
| (0.102) | (0.034) | |
| sup_parents_mean | 0.721*** | 0.240*** |
| (0.104) | (0.035) | |
| Num.Obs. | 283 | 283 |
| R2 | 0.435 | 0.435 |
| R2 Adj. | 0.427 | 0.427 |
| AIC | 1068.9 | 447.1 |
| BIC | 1090.7 | 468.9 |
| Log.Lik. | -528.436 | -217.528 |
| RMSE | 1.57 | 0.52 |
g1 = modelplot(mod_sum_parcel, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"), guide = "none")
g2 = modelplot(mod_mean_parcel, coef_omit = 'Intercept') + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"))
plot <- ggarrange(g1,g2, ncol=2, nrow=1);Hierarchical Models
formula_hierarchy_mean = "ls_mean ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"
formula_hierarchy_sum = "ls_sum ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"
hierarchical_mean_fit <- lmer(formula_hierarchy_mean, data = df, REML = TRUE)
hierarchical_sum_fit <- lmer(formula_hierarchy_sum, data = df, REML = TRUE)
g1 = modelplot(hierarchical_mean_fit, re.form=NA) + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"), guide = "none")
g2 = modelplot(hierarchical_sum_fit, re.form=NA) + aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
scale_color_manual(values = c("grey", "black"))
plot <- ggarrange(g1,g2, ncol=2, nrow=1);Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
modelsummary(list("hierarchical_mean_fit"= hierarchical_mean_fit,
"hierarchical_sum_fit"= hierarchical_sum_fit),
stars = TRUE) |>
style_tt(
i = 2:25,
j = 1:1,
background = "#17C2AD",
color = "white",
italic = TRUE)| hierarchical_mean_fit | hierarchical_sum_fit | |
|---|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||
| (Intercept) | 0.784* | 2.352* |
| (0.377) | (1.132) | |
| sup_parents_p1 | 0.021 | 0.062 |
| (0.048) | (0.145) | |
| sup_parents_p2 | 0.036 | 0.108 |
| (0.048) | (0.145) | |
| sup_parents_p3 | 0.179*** | 0.537*** |
| (0.042) | (0.126) | |
| sup_friends_p1 | -0.084+ | -0.253+ |
| (0.050) | (0.150) | |
| sup_friends_p2 | 0.115* | 0.346* |
| (0.053) | (0.160) | |
| sup_friends_p3 | 0.045 | 0.136 |
| (0.043) | (0.130) | |
| se_acad_p1 | -0.069 | -0.207 |
| (0.064) | (0.193) | |
| se_acad_p2 | 0.123+ | 0.369+ |
| (0.067) | (0.202) | |
| se_acad_p3 | 0.053 | 0.160 |
| (0.061) | (0.184) | |
| se_social_p1 | 0.117+ | 0.352+ |
| (0.068) | (0.203) | |
| se_social_p2 | 0.170* | 0.509* |
| (0.074) | (0.221) | |
| se_social_p3 | 0.149** | 0.447** |
| (0.054) | (0.161) | |
| SD (Intercept region) | 0.248 | 0.744 |
| SD (Observations) | 0.498 | 1.493 |
| Num.Obs. | 283 | 283 |
| R2 Marg. | 0.456 | 0.456 |
| R2 Cond. | 0.564 | 0.564 |
| AIC | 486.1 | 1079.3 |
| BIC | 540.8 | 1134.0 |
| ICC | 0.2 | 0.2 |
| RMSE | 0.49 | 1.46 |
Marginal Effects
pred <- predictions(hierarchical_mean_fit, newdata = datagrid(sup_parents_p3 = 1:10, region=c('east', 'west')), re.form=NA)
pred1 <- predictions(mod_mean, newdata = datagrid(sup_parents_p2 = 1:10))
pred1 |> head() |> kableExtra::kable()| rowid | estimate | std.error | statistic | p.value | s.value | conf.low | conf.high | se_acad_p1 | se_acad_p2 | se_acad_p3 | se_social_p1 | se_social_p2 | se_social_p3 | sup_friends_p1 | sup_friends_p2 | sup_friends_p3 | sup_parents_p1 | sup_parents_p3 | sup_parents_p2 | ls_mean |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 5.501073 | 0.2498601 | 22.01661 | 0 | 354.4488 | 5.011356 | 5.990790 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 1 | 6.144225 |
| 2 | 5.513680 | 0.2000338 | 27.56375 | 0 | 553.1635 | 5.121621 | 5.905739 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 2 | 6.144225 |
| 3 | 5.526287 | 0.1505320 | 36.71170 | 0 | 977.7205 | 5.231249 | 5.821324 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 3 | 6.144225 |
| 4 | 5.538894 | 0.1018295 | 54.39382 | 0 | Inf | 5.339312 | 5.738476 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 4 | 6.144225 |
| 5 | 5.551501 | 0.0560496 | 99.04623 | 0 | Inf | 5.441645 | 5.661356 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 5 | 6.144225 |
| 6 | 5.564107 | 0.0312544 | 178.02653 | 0 | Inf | 5.502850 | 5.625365 | 5.154955 | 5.346853 | 5.211248 | 5.290047 | 5.477267 | 5.440989 | 5.786219 | 6.010601 | 5.991166 | 5.975265 | 5.719081 | 6 | 6.144225 |
Regression Marginal Effects
g = plot_predictions(mod_mean, condition = c("sup_parents_p3"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")
g1 = plot_predictions(mod_mean, condition = c("sup_friends_p1"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")
g2 = plot_predictions(mod_mean, condition = c("se_acad_p1"),
type = "response") + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")
plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);Hierarchical Marginal Effects
g = plot_predictions(hierarchical_mean_fit, condition = c("sup_parents_p3", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")
g1 = plot_predictions(hierarchical_mean_fit, condition = c("sup_friends_p1", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")
g2 = plot_predictions(hierarchical_mean_fit, condition = c("se_acad_p1", "region"),
type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")
plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);Confirmatory Factor Analysis
model_measurement <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS =~ ls_p1 + ls_p2 + ls_p3
"
fit_mod <- cfa(model_measurement, data = df)
modelplot(fit_mod, coef_omit='=~')summary(fit_mod, fit.measures = TRUE, standardized = TRUE)lavaan 0.6-18 ended normally after 50 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 40
Number of observations 283
Model Test User Model:
Test statistic 208.228
Degrees of freedom 80
P-value (Chi-square) 0.000
Model Test Baseline Model:
Test statistic 2596.891
Degrees of freedom 105
P-value 0.000
User Model versus Baseline Model:
Comparative Fit Index (CFI) 0.949
Tucker-Lewis Index (TLI) 0.932
Loglikelihood and Information Criteria:
Loglikelihood user model (H0) -4366.931
Loglikelihood unrestricted model (H1) -4262.817
Akaike (AIC) 8813.863
Bayesian (BIC) 8959.680
Sample-size adjusted Bayesian (SABIC) 8832.840
Root Mean Square Error of Approximation:
RMSEA 0.075
90 Percent confidence interval - lower 0.063
90 Percent confidence interval - upper 0.088
P-value H_0: RMSEA <= 0.050 0.001
P-value H_0: RMSEA >= 0.080 0.277
Standardized Root Mean Square Residual:
SRMR 0.055
Parameter Estimates:
Standard errors Standard
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
SUP_Parents =~
sup_parents_p1 1.000 0.937 0.875
sup_parents_p2 1.032 0.055 18.643 0.000 0.967 0.885
sup_parents_p3 0.995 0.059 16.808 0.000 0.932 0.816
SUP_Friends =~
sup_friends_p1 1.000 1.020 0.905
sup_friends_p2 0.794 0.043 18.474 0.000 0.810 0.857
sup_friends_p3 0.892 0.050 17.738 0.000 0.910 0.831
SE_Academic =~
se_acad_p1 1.000 0.697 0.880
se_acad_p2 0.805 0.049 16.284 0.000 0.561 0.819
se_acad_p3 0.951 0.058 16.495 0.000 0.663 0.828
SE_Social =~
se_social_p1 1.000 0.641 0.847
se_social_p2 0.957 0.055 17.297 0.000 0.614 0.880
se_social_p3 0.923 0.066 13.926 0.000 0.592 0.741
LS =~
ls_p1 1.000 0.519 0.487
ls_p2 0.989 0.137 7.235 0.000 0.513 0.705
ls_p3 1.194 0.165 7.225 0.000 0.619 0.702
Covariances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
SUP_Parents ~~
SUP_Friends 0.132 0.064 2.070 0.038 0.138 0.138
SE_Academic 0.219 0.046 4.733 0.000 0.336 0.336
SE_Social 0.285 0.046 6.248 0.000 0.474 0.474
LS 0.339 0.057 5.923 0.000 0.698 0.698
SUP_Friends ~~
SE_Academic 0.071 0.047 1.494 0.135 0.100 0.100
SE_Social 0.195 0.046 4.254 0.000 0.298 0.298
LS 0.156 0.044 3.505 0.000 0.295 0.295
SE_Academic ~~
SE_Social 0.274 0.036 7.532 0.000 0.614 0.614
LS 0.178 0.036 4.972 0.000 0.493 0.493
SE_Social ~~
LS 0.263 0.043 6.150 0.000 0.790 0.790
Variances:
Estimate Std.Err z-value P(>|z|) Std.lv Std.all
.sup_parents_p1 0.270 0.037 7.313 0.000 0.270 0.235
.sup_parents_p2 0.259 0.038 6.856 0.000 0.259 0.217
.sup_parents_p3 0.436 0.047 9.201 0.000 0.436 0.334
.sup_friends_p1 0.229 0.040 5.697 0.000 0.229 0.180
.sup_friends_p2 0.237 0.030 7.914 0.000 0.237 0.265
.sup_friends_p3 0.371 0.042 8.807 0.000 0.371 0.309
.se_acad_p1 0.141 0.022 6.481 0.000 0.141 0.225
.se_acad_p2 0.154 0.018 8.653 0.000 0.154 0.329
.se_acad_p3 0.202 0.024 8.405 0.000 0.202 0.315
.se_social_p1 0.162 0.020 8.003 0.000 0.162 0.282
.se_social_p2 0.109 0.016 6.757 0.000 0.109 0.225
.se_social_p3 0.288 0.028 10.138 0.000 0.288 0.452
.ls_p1 0.866 0.078 11.071 0.000 0.866 0.763
.ls_p2 0.267 0.030 8.996 0.000 0.267 0.504
.ls_p3 0.395 0.044 9.044 0.000 0.395 0.507
SUP_Parents 0.878 0.098 8.940 0.000 1.000 1.000
SUP_Friends 1.041 0.111 9.397 0.000 1.000 1.000
SE_Academic 0.485 0.054 8.912 0.000 1.000 1.000
SE_Social 0.411 0.049 8.472 0.000 1.000 1.000
LS 0.269 0.068 3.958 0.000 1.000 1.000
semPlot::semPaths(fit_mod, whatLabels = 'std', intercepts = FALSE)Comparing the empirical and implied variance-covariance matrix
heat_df = data.frame(resid(fit_mod, type = "standardized")$cov)
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")
heat_df |> mutate(value = round(value, 2)) |> ggplot(aes(x = x, y = y, fill = value)) +
geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
scale_fill_gradient2(
high = 'dodgerblue4',
mid = 'white',
low = 'firebrick2'
) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit")Structural Equation Models
model <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS =~ ls_p1 + ls_p2 + ls_p3
# Structural model
# Regressions
LS ~ SE_Academic + SE_Social + SUP_Parents + SUP_Friends
# Residual covariances
SE_Academic ~~ SE_Social
"
fit_mod_sem <- cfa(model_measurement, data = df)
modelplot(fit_mod, coef_omit='=~')```
Citation
BibTeX citation:
@online{forde,
author = {Nathaniel Forde},
title = {Confirmatory {Factor} {Analysis} and {Structural} {Equation}
{Models}},
langid = {en}
}
For attribution, please cite this work as:
Nathaniel Forde. n.d. “Confirmatory Factor Analysis and Structural
Equation Models.”